## ---- Functions ----
extract_qoi_med <- function(x, m, t) {
  tibble(
    qoi = "me",
    est = x$d.avg,
    se = sd(x$d.avg.sims),
    lower = x$d.avg.ci[1],
    upper = x$d.avg.ci[2],
    p = x$d.avg.p
  ) %>%
    dplyr::bind_rows(
      tibble(
        qoi = "de",
        est = x$z.avg,
        se = sd(x$z.avg.sims),
        lower = x$z.avg.ci[1],
        upper = x$z.avg.ci[2],
        p = x$z.avg.p
      )
    ) %>%
    dplyr::bind_rows(
      tibble(
        qoi = "te",
        est = x$tau.coef,
        se = sd(x$tau.sims),
        lower = x$tau.ci[1],
        upper = x$tau.ci[2],
        p = x$tau.p
      )
    )  %>%
    dplyr::bind_rows(
      tibble(
        qoi = "prop",
        est = x$n.avg,
        se = sd(x$n.avg.sims),
        lower = x$n.avg.ci[1],
        upper = x$n.avg.ci[2],
        p = x$n.avg.p
      )
    ) %>%
    dplyr::mutate(imp = m,
                  tertile = t)
}

extract_qoi_sens <- function(x, m, t) {
  tibble(
    qoi = "me",
    rho = x$rho,
    est = x$d0,
    lower = x$lower.d0,
    upper = x$upper.d0
  ) %>%
    dplyr::bind_rows(tibble(
      qoi = "de",
      rho = x$rho,
      est = x$z0,
      lower = x$lower.z0,
      upper = x$upper.z0
    )) %>%
    dplyr::mutate(
      r2_m = x$r.square.m,
      r2_y = x$r.square.y,
      imp = m,
      tertile = t
    )
}

## ---- Data ----
## SOEP data
load("dat/proc-data/soep_imp.RData")

## ---- Shortened TSCS: AfD analyses, 2014-2018 (demeaning) ----
## Variable selection for demeaning
dm_vars <- c(
  "afd",
  "cmr_arm",
  "cold_rent_sqm",
  "log_hinc_eq",
  "prop_personal_hinc",
  "hh_prop_ecact",
  "hh_mmb",
  "lm_part_Atypical",
  "lm_part_Inactive",
  "lm_part_Unemployed",
  "lm_part_In Education",
  "lm_part_Pensioners",
  "y14",
  "y15",
  "y16",
  "y17",
  "y18"
)

## Updated regional data
regional_data <- rio::import("../../soep-data/soep.v38/stata/regionl.dta") %>%
  dplyr::filter(kkz > 0) %>%
  dplyr::select(kkz,
                syear,
                kr_uemprate,
                kr_foreigner) %>%
  dplyr::rename(year = syear) %>%
  dplyr::mutate_at(
    .vars = vars(kr_uemprate,
                 kr_foreigner),
    .funs = ~ ifelse(. < 0, NA_real_, .)
  ) %>%
  dplyr::distinct() %>%
  dplyr::group_by(kkz, year) %>%
  dplyr::summarize_all(mean) %>%
  dplyr::ungroup()

## Data management
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    dplyr::mutate(y14 = as.numeric(year == 2014),
                  y15 = as.numeric(year == 2015),
                  y16 = as.numeric(year == 2016),
                  y17 = as.numeric(year == 2017),
                  y18 = as.numeric(year == 2018)) %>%
    dplyr::mutate(
      yrs_at_current_address = year - hh_at_current_address,
      age_at_current_address = age - yrs_at_current_address
    ) %>%
    dplyr::mutate(
      ltr_since_3 = as.numeric(yrs_at_current_address >= 3),
      ltr_since_5 = as.numeric(yrs_at_current_address >= 5)
    ) %>%
    filter(renter_no_rent == 0) %>%
    dplyr::select(
      id,
      hh_id,
      plz,
      kkz,
      year,
      year_fac,
      weight,
      starts_with("cold_rent_load"),
      starts_with("cold_rent_sqm"),
      wr_ecego,
      wr_immig,
      afd,
      vote_afd,
      owner,
      all_of(dm_vars),
      starts_with("rent"),
      starts_with("cmr_arm"),
      starts_with("log_hinc_eq"),
      starts_with("gtyp"),
      starts_with("prop_personal_hinc"),
      starts_with("hh_prop_ecact"),
      starts_with("hh_mmb"),
      starts_with("mover"),
      starts_with("lm_part"),
      starts_with("edu"),
      starts_with("age"),
      starts_with("east"),
      starts_with("fem"),
      starts_with("east"),
      starts_with("ltr_"),
      social_housing
    ) %>%
    dplyr::filter(not(is.na(rent_umn))) %>%
    dplyr::filter(not(is.na(rent_cwu))) %>%
    dplyr::filter(not(is.na(cmr_arm_umn))) %>%
    dplyr::filter(not(is.na(cmr_arm_cwu)))
}

## Local market data
load("dat/proc-data/plz5_f_und_b.RData")
plz5_f_und_b <- plz5_f_und_b %>%
  st_set_geometry(NULL) %>%
  filter(year %in% 2005:2018) %>%
  dplyr::select(plz, year, cmr_arm, rent) %>%
  distinct() %>%
  group_by(plz) %>%
  mutate(
    cmr_arm_2005 = mean(cmr_arm[year %in% 2005:2007], na.rm = TRUE),
    cmr_arm_2018 = mean(cmr_arm[year %in% 2016:2018], na.rm = TRUE),
    cmr_arm_chg = cmr_arm_2018 - cmr_arm_2005
  ) %>%
  mutate(
    rent_2005 = mean(rent[year %in% 2005:2007], na.rm = TRUE),
    rent_2018 = mean(rent[year %in% 2016:2018], na.rm = TRUE),
    rent_chg = rent_2018 - rent_2005,
    cmr_arm_chg = ifelse(is.na(cmr_arm_chg), rent_chg, cmr_arm_chg),
    cmr_arm = ifelse(is.na(cmr_arm), rent, cmr_arm)
  ) %>%
  ungroup() %>%
  group_by(year) %>%
  mutate(
    rent_cat =
      ifelse(
        year == 2004,
        NA_integer_,
        cut(
          rent,
          breaks = quantile(rent, c(0, 1 / 3, 2 / 3, 1), na.rm = TRUE),
          labels = paste0("T", 1:3)
        )
      ),
    cmr_arm_cat =
      cut(
        cmr_arm,
        breaks = quantile(cmr_arm, c(0, 1 / 3, 2 / 3, 1), na.rm = TRUE),
        labels = paste0("T", 1:3)
      )
  ) %>%
  ungroup() %>%
  dplyr::select(plz, cmr_arm_chg, rent_chg, rent_cat, cmr_arm_cat, year) %>%
  distinct() %>%
  na.omit() %>%
  mutate(
    cmr_arm_chg_cat =
      cut(
        cmr_arm_chg,
        breaks = quantile(cmr_arm_chg, c(0, 1 / 3, 2 / 3, 1)),
        labels = paste0("T", 1:3)
      ),
    rent_chg_cat =
      cut(
        rent_chg,
        breaks = quantile(rent_chg, c(0, 1 / 3, 2 / 3, 1)),
        labels = paste0("T", 1:3)
      )
  )

## Additional data management
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    # top code household rents per sqm
    mutate(cold_rent_sqm = case_when(cold_rent_sqm > 40 ~ 40,
                                     TRUE ~ cold_rent_sqm)) %>%
    group_by(year) %>%
    mutate(log_hinc_eq_cat =
             cut(
               log_hinc_eq,
               breaks = quantile(log_hinc_eq, c(0, 1 / 3, 2 / 3, 1)),
               labels = paste0("T", 1:3)
             )) %>%
    ungroup() %>%
    mutate(
      gtyp4 = case_when(
        gtyp %in% c(
          "[1] Kernstaedte>==gr.Verdraum",
          "[2] Kernstaedte<==gr.Verdraum",
          "[9] Kernstaedte<==Verdansatz"
        ) ~ "urban",
        gtyp %in% c(
          "[7] Mi-zent.=laendl=gr.Verdraum",
          "[10] Mi-zent.=verd.=Verdansatz",
          "[12] Mi-zent.=laendl=Verdansatz",
          "[14] Mi-zent.=verd.=laendl.",
          "[16] Mi-zent.=laendl=laendl."
        ) ~ "towns",
        gtyp %in% c(
          "[3] Mi-zent.=hochverd.=gr.Verdraum",
          "[5] Mi-zent.=verd.=gr.Verdraum",
          "[4] so.Gem.=hochverd.=gr.Verdraum",
          "[6] so.Gem.=verd.=gr.Verdraum"
        ) ~ "suburban",
        gtyp %in% c(
          "[8] so.Gem.=laendl=gr.Verdraum",
          "[11] so.Gem.=verd.=Verdansatz",
          "[13] so.Gem.=laendl=Verdansatz",
          "[15] so.Gem.=verd.=laendl.",
          "[17] so.Gem.=laendl=laendl."
        ) ~ "rural",
        TRUE ~ NA_character_
      ),
      gtyp3 = ifelse(gtyp4 == "towns", "rural", gtyp4)
    ) %>%
    left_join(plz5_f_und_b,
              by = c("plz", "year")) %>%
    dplyr::filter(weight > 0)
  
  ## Repair names
  names(soep_imp$imputations[[m]]) <-
    gsub(" ", "_", names(soep_imp$imputations[[m]]))
}

## Join county-level data, within-demeaning
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    dplyr::select(-dplyr::starts_with("kr_")) %>%
    dplyr::left_join(regional_data,
                     by = c("kkz", "year")) %>%
    dplyr::select(-ends_with("_cwu"),-ends_with("_cwu"))
}


## 2014:2018 Data for AfD support analyses
dm_vars <- gsub(" ", "_", dm_vars)
soep_afd <- soep_imp
soep_afd$imputations <-
  lapply(soep_afd$imputations, function (imp) {
    imp %>%
      dplyr::filter(year >= 2014) %>%
      dplyr::select(-dplyr::ends_with("_cwu")) %>%
      dplyr::select(-dplyr::ends_with("_umn")) %>%
      group_by(id) %>%
      mutate(log_hinc_eq_umn = mean(log_hinc_eq, na.rm = T)) %>%
      mutate(
        cmr_arm_cat = get_mode(cmr_arm_cat)[1],
        cmr_arm_chg_cat = get_mode(cmr_arm_chg_cat)[1]
      ) %>%
      ungroup() %>%
      mutate(log_hinc_eq_cat =
               cut(
                 log_hinc_eq_umn,
                 breaks = quantile(log_hinc_eq_umn, c(0, 1 / 3, 2 / 3, 1)),
                 labels = paste0("T", 1:3)
               ))
  })

## ---- Estimation -----
dir.create("est/mediation")

## Set up cluster for parallelized computation across imputations
cl <- makeCluster(length(soep_afd$imputations),
                  outfile = "est/mediation.txt",
                  type = "FORK")

## Marginal effects
med <- parLapply(cl,
                 seq_along(soep_afd$imputations),
                 function (m) {
                   ## ---- Seed ----
                   set.seed(20240503L)
                   
                   ## ---- T1 ----
                   dat1 <- soep_afd$imputations[[m]] %>%
                     filter(owner == 0) %>%
                     filter(log_hinc_eq_cat == "T1") %>%
                     filter(ltr_since_5 == 1) %>%
                     group_by(id) %>%
                     mutate_at(.vars = vars(all_of(dm_vars)),
                               .funs = list(
                                 umn = ~ mean(., na.rm = T),
                                 cwu = ~ . - mean(., na.rm = T)
                               )) %>%
                     ungroup()
                   
                   # Estimate models
                   mod1_1 <- lm(
                     afd_cwu ~
                       cmr_arm_cwu +
                       log_hinc_eq_cwu +
                       prop_personal_hinc_cwu +
                       hh_prop_ecact_cwu +
                       hh_mmb_cwu +
                       lm_part_Atypical_cwu +
                       lm_part_Inactive_cwu +
                       lm_part_Unemployed_cwu +
                       lm_part_In_Education_cwu +
                       lm_part_Pensioners_cwu +
                       + year_fac,
                     data = dat1,
                     weights = weight
                   )
                   
                   mod2_1 <- lm(
                     cold_rent_sqm_cwu ~
                       cmr_arm_cwu +
                       log_hinc_eq_cwu +
                       prop_personal_hinc_cwu +
                       hh_prop_ecact_cwu +
                       hh_mmb_cwu +
                       lm_part_Atypical_cwu +
                       lm_part_Inactive_cwu +
                       lm_part_Unemployed_cwu +
                       lm_part_In_Education_cwu +
                       lm_part_Pensioners_cwu +
                       + year_fac,
                     data = dat1,
                     weights = weight
                   )
                   
                   mod3_1 <- lm(
                     afd_cwu ~
                       cmr_arm_cwu +
                       log_hinc_eq_cwu +
                       prop_personal_hinc_cwu +
                       hh_prop_ecact_cwu +
                       hh_mmb_cwu +
                       lm_part_Atypical_cwu +
                       lm_part_Inactive_cwu +
                       lm_part_Unemployed_cwu +
                       lm_part_In_Education_cwu +
                       lm_part_Pensioners_cwu +
                       year_fac +
                       cold_rent_sqm_cwu,
                     data = dat1,
                     weights = weight
                   )
                   
                   ## ---- Adjust df ----
                   df_adj <- length(unique(dat1$id)) - 1L
                   mod1_1$df.residual <- mod1_1$df.residual - df_adj
                   mod2_1$df.residual <- mod2_1$df.residual - df_adj
                   mod3_1$df.residual <- mod3_1$df.residual - df_adj

                   ## ---- T2 ----
                   dat2 <- soep_afd$imputations[[m]] %>%
                     filter(owner == 0) %>%
                     filter(log_hinc_eq_cat == "T2") %>%
                     filter(ltr_since_5 == 1) %>%
                     group_by(id) %>%
                     mutate_at(.vars = vars(all_of(dm_vars)),
                               .funs = list(
                                 umn = ~ mean(., na.rm = T),
                                 cwu = ~ . - mean(., na.rm = T)
                               )) %>%
                     ungroup()
                   
                   # Estimate models
                   mod1_2 <- lm(
                     afd_cwu ~
                       cmr_arm_cwu +
                       log_hinc_eq_cwu +
                       prop_personal_hinc_cwu +
                       hh_prop_ecact_cwu +
                       hh_mmb_cwu +
                       lm_part_Atypical_cwu +
                       lm_part_Inactive_cwu +
                       lm_part_Unemployed_cwu +
                       lm_part_In_Education_cwu +
                       lm_part_Pensioners_cwu +
                       + year_fac,
                     data = dat2,
                     weights = weight
                   )
                   
                   mod2_2 <- lm(
                     cold_rent_sqm_cwu ~
                       cmr_arm_cwu +
                       log_hinc_eq_cwu +
                       prop_personal_hinc_cwu +
                       hh_prop_ecact_cwu +
                       hh_mmb_cwu +
                       lm_part_Atypical_cwu +
                       lm_part_Inactive_cwu +
                       lm_part_Unemployed_cwu +
                       lm_part_In_Education_cwu +
                       lm_part_Pensioners_cwu +
                       + year_fac,
                     data = dat2,
                     weights = weight
                   )
                   
                   mod3_2 <- lm(
                     afd_cwu ~
                       cmr_arm_cwu +
                       log_hinc_eq_cwu +
                       prop_personal_hinc_cwu +
                       hh_prop_ecact_cwu +
                       hh_mmb_cwu +
                       lm_part_Atypical_cwu +
                       lm_part_Inactive_cwu +
                       lm_part_Unemployed_cwu +
                       lm_part_In_Education_cwu +
                       lm_part_Pensioners_cwu +
                       year_fac +
                       cold_rent_sqm_cwu,
                     data = dat2,
                     weights = weight
                   )
                   
                   ## ---- Adjust df ----
                   df_adj <- length(unique(dat2$id)) - 1L
                   mod1_2$df.residual <- mod1_2$df.residual - df_adj
                   mod2_2$df.residual <- mod2_2$df.residual - df_adj
                   mod3_2$df.residual <- mod3_2$df.residual - df_adj
                   
                   ## ---- T3 ----
                   dat3 <- soep_afd$imputations[[m]] %>%
                     filter(owner == 0) %>%
                     filter(log_hinc_eq_cat == "T3") %>%
                     filter(ltr_since_5 == 1) %>%
                     group_by(id) %>%
                     mutate_at(.vars = vars(all_of(dm_vars)),
                               .funs = list(
                                 umn = ~ mean(., na.rm = T),
                                 cwu = ~ . - mean(., na.rm = T)
                               )) %>%
                     ungroup()
                   
                   # Estimate models
                   mod1_3 <- lm(
                     afd_cwu ~
                       cmr_arm_cwu +
                       log_hinc_eq_cwu +
                       prop_personal_hinc_cwu +
                       hh_prop_ecact_cwu +
                       hh_mmb_cwu +
                       lm_part_Atypical_cwu +
                       lm_part_Inactive_cwu +
                       lm_part_Unemployed_cwu +
                       lm_part_In_Education_cwu +
                       lm_part_Pensioners_cwu +
                       + year_fac,
                     data = dat3,
                     weights = weight
                   )
                   
                   mod2_3 <- lm(
                     cold_rent_sqm_cwu ~
                       cmr_arm_cwu +
                       log_hinc_eq_cwu +
                       prop_personal_hinc_cwu +
                       hh_prop_ecact_cwu +
                       hh_mmb_cwu +
                       lm_part_Atypical_cwu +
                       lm_part_Inactive_cwu +
                       lm_part_Unemployed_cwu +
                       lm_part_In_Education_cwu +
                       lm_part_Pensioners_cwu +
                       + year_fac,
                     data = dat3,
                     weights = weight
                   )
                   
                   mod3_3 <- lm(
                     afd_cwu ~
                       cmr_arm_cwu +
                       log_hinc_eq_cwu +
                       prop_personal_hinc_cwu +
                       hh_prop_ecact_cwu +
                       hh_mmb_cwu +
                       lm_part_Atypical_cwu +
                       lm_part_Inactive_cwu +
                       lm_part_Unemployed_cwu +
                       lm_part_In_Education_cwu +
                       lm_part_Pensioners_cwu +
                       year_fac +
                       cold_rent_sqm_cwu,
                     data = dat3,
                     weights = weight
                   )
                   
                   ## ---- Adjust df for within-adjustment ----
                   df_adj <- length(unique(dat3$id)) - 1L
                   mod1_3$df.residual <- mod1_3$df.residual - df_adj
                   mod2_3$df.residual <- mod2_3$df.residual - df_adj
                   mod3_3$df.residual <- mod3_3$df.residual - df_adj

                   ## ---- Mediation analyses ----
                   ## Analyses
                   med1 <- mediation::mediate(
                     model.m = mod2_1,
                     model.y = mod3_1,
                     treat = "cmr_arm_cwu",
                     mediator = "cold_rent_sqm_cwu"
                   )
                   
                   med2 <- mediation::mediate(
                     model.m = mod2_2,
                     model.y = mod3_2,
                     treat = "cmr_arm_cwu",
                     mediator = "cold_rent_sqm_cwu"
                   )
                   
                   med3 <- mediation::mediate(
                     model.m = mod2_3,
                     model.y = mod3_3,
                     treat = "cmr_arm_cwu",
                     mediator = "cold_rent_sqm_cwu"
                   )
                   
                   ## ---- Sensitivity analyses ----
                   sens1 <- mediation::medsens(med1,
                                               rho.by = 0.025,
                                               effect.type = "both")
                   
                   sens2 <- mediation::medsens(med2,
                                               rho.by = 0.025,
                                               effect.type = "both")
                   
                   sens3 <- mediation::medsens(med3,
                                               rho.by = 0.025,
                                               effect.type = "both")
                   
                   ## ---- Extraction ----
                   out_med <- extract_qoi_med(med1, m, 1) %>%
                     dplyr::bind_rows(extract_qoi_med(med2, m, 2)) %>%
                     dplyr::bind_rows(extract_qoi_med(med3, m, 3))
                   
                   out_sens <- extract_qoi_sens(sens1, m, 1) %>%
                     dplyr::bind_rows(extract_qoi_sens(sens2, m, 2)) %>%
                     dplyr::bind_rows(extract_qoi_sens(sens3, m, 3))
                   
                   ## ---- Return value ----
                   return(
                     list(
                       med = out_med,
                       sens = out_sens
                     )
                   )
                 })

## Exit parallel computation
stopCluster(cl)

## ---- Combine estimates ----
med_sum <- lapply(med, function(x) x$med) %>%
  dplyr::bind_rows() %>%
  dplyr::arrange(tertile, qoi, imp)

sens_sum <- lapply(med, function(x) x$sens) %>%
  dplyr::bind_rows() %>%
  dplyr::arrange(tertile, qoi, imp, rho)

## ---- Export as CSV ----
med_sum %>%
  write.csv(paste0("est/mediation_results.csv"))

sens_sum %>%
  write.csv(paste0("est/mediation_sensitivity.csv"))
